library(data.table)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggpubr)
## Loading required package: ggplot2
library(Biostrings)
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.5
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     Filter, Find, Map, Position, Reduce, anyDuplicated, append,
##     as.data.frame, basename, cbind, colnames, dirname, do.call,
##     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
##     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following objects are masked from 'package:data.table':
## 
##     first, second
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:data.table':
## 
##     shift
## Loading required package: XVector
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble  3.1.4     ✔ stringr 1.4.0
## ✔ readr   2.0.1     ✔ forcats 0.5.1
## ✔ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ BiocGenerics::Position() masks ggplot2::Position(), base::Position()
## ✖ dplyr::between()         masks data.table::between()
## ✖ Biostrings::collapse()   masks IRanges::collapse(), dplyr::collapse()
## ✖ BiocGenerics::combine()  masks dplyr::combine()
## ✖ purrr::compact()         masks XVector::compact()
## ✖ IRanges::desc()          masks dplyr::desc()
## ✖ S4Vectors::expand()      masks tidyr::expand()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ S4Vectors::first()       masks dplyr::first(), data.table::first()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ dplyr::last()            masks data.table::last()
## ✖ purrr::reduce()          masks IRanges::reduce()
## ✖ S4Vectors::rename()      masks dplyr::rename()
## ✖ XVector::slice()         masks IRanges::slice(), dplyr::slice()
## ✖ purrr::transpose()       masks data.table::transpose()
library(foreach)
## 
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
library(stringdist)
## 
## Attaching package: 'stringdist'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(doParallel)
## Loading required package: iterators

Useful functions

# Function to provide a closest match. Used to match HLA Alleles across mixed output styles.
ClosestMatch2 = function(string, stringVector){

  stringVector[amatch(string, stringVector, maxDist=Inf)]

}

Aims

Question: If we look at ratios of binding (agretopicity), stability, and/or fraction of hydrophobicity, do we see any evidence of immune escape?

Method

Read in Wuhan binding dataset

WUHAN_PEPTIDES_BINDING_SCORES=readRDS("ORIGINAL_PEPTIDES_BINDING_SCORES_V2.rds")
WUHAN_PEPTIDES_BINDING_SCORES %>% head
## # A tibble: 6 × 15
##   Peptide      Predicted_Bindi… `Thalf(h)` `EL-score` EL_Rank `BA-score` BA_Rank
##   <chr>        <chr>            <chr>      <chr>      <chr>   <chr>      <chr>  
## 1 AADLDDFSKQL  HLA-A0101,HLA-A… 0.2226,0.… 0.0085,0.… 5.3632… 0.0571,0.… 19.165…
## 2 AAGLEAPFLY   HLA-A0101,HLA-A… 0.9656,0.… 0.2728,4e… 0.5874… 0.2922,0.… 0.7888…
## 3 AAGLEAPFLYLY HLA-A0101,HLA-A… 0.9528,0.… 0.0694,1e… 1.6217… 0.2495,0.… 1.0825…
## 4 AAISDYDYY    HLA-A0101,HLA-A… 1.0031,0.… 0.3568,4e… 0.4523… 0.3202,0.… 0.6298…
## 5 AAISDYDYYR   HLA-A0101,HLA-A… 0.3461,0.… 0.0039,4e… 8.4074… 0.1012,0.… 6.4802…
## 6 AAVYRINW     HLA-A0101,HLA-A… 0.3428,0.… 4e-04,0,0… 30.964… 0.0245,0.… 62.656…
## # … with 8 more variables: Ave <chr>, Binder <chr>, nM_41 <chr>, nM <chr>,
## #   Aff_Binder <chr>, Length <int>, HydrophobicCount <int>, HydroFraction <dbl>
WUHAN_PEPTIDES_BINDING_SCORES %>% nrow
## [1] 1380

For 9/10mers of the wuhan originals, what are dominant HLAs?

WUHAN_PEPTIDES_BINDING_SCORES%>% mutate(Length=nchar(Peptide)) %>% filter(Length%in% c(9,10))%>% select(Peptide, Predicted_Binding, Binder,Length) %>% separate_rows_(cols=c("Predicted_Binding", "Binder"), sep=",") %>% distinct() %>% group_by(Predicted_Binding,Binder,Length) %>%mutate(Length = as.character(Length))%>%
    dplyr::summarise(n=n())%>% filter(Binder == "BINDER")%>% arrange(desc(n))%>%
    ggbarplot(x="Predicted_Binding",y="n",fill="Length",position = position_dodge())+coord_flip()
## `summarise()` has grouped output by 'Predicted_Binding', 'Binder'. You can override using the `.groups` argument.

WUHAN_PEPTIDES_BINDING_SCORES%>% mutate(Length=nchar(Peptide)) %>% filter(Length%in% c(9,10))%>% select(Peptide, Predicted_Binding, Binder,Length) %>% separate_rows_(cols=c("Predicted_Binding", "Binder"), sep=",") %>% distinct() %>% filter(Predicted_Binding == "HLA-A0201")%>% group_by(Predicted_Binding,Binder,Length) %>%mutate(Length = as.character(Length))%>%
    dplyr::summarise(n=n())%>% filter(Binder == "BINDER")%>% arrange(desc(n))
## `summarise()` has grouped output by 'Predicted_Binding', 'Binder'. You can override using the `.groups` argument.
## # A tibble: 2 × 4
## # Groups:   Predicted_Binding, Binder [1]
##   Predicted_Binding Binder Length     n
##   <chr>             <chr>  <chr>  <int>
## 1 HLA-A0201         BINDER 9        162
## 2 HLA-A0201         BINDER 10        48

Read in Omicron mutanta binding dataset

OMICRON_PEPTIDES_BINDING_SCORES=readRDS("OMICRON_PEPTIDES_BINDING_SCORES_V2.rds")
OMICRON_PEPTIDES_BINDING_SCORES %>% head
## # A tibble: 6 × 13
##   VariantAlignment MT_Predicted_Bind… `MT_Thalf(h)`   `MT_EL-score`  MT_EL_Rank 
##   <chr>            <chr>              <chr>           <chr>          <chr>      
## 1 AEYVNNSY         HLA-A0101,HLA-A02… 0.2295,0.1438,… 0.0412,0,1e-0… 2.2565,55.…
## 2 AKSHNITLIW       HLA-A0101,HLA-A02… 0.2395,0.1924,… 0.0048,1e-04,… 7.3782,47.…
## 3 ALRITFGGP        HLA-A0101,HLA-A02… 0.0843,0.1071,… 0,9e-04,0.004… 73.4615,18…
## 4 APGQTGNIA        HLA-A0101,HLA-A02… 0.0906,0.1022,… 5e-04,0,1e-04… 29.3103,52…
## 5 CNDPFLDHK        HLA-A0101,HLA-A02… 0.1508,0.093,0… 0.0105,0,1e-0… 4.7481,63.…
## 6 CNDPFLDHKN       HLA-A0101,HLA-A02… 0.0855,0.0757,… 2e-04,0,0,0,0… 46,90,95,9…
## # … with 8 more variables: MT_BA-score <chr>, MT_BA_Rank <chr>, MT_Ave <chr>,
## #   MT_Binder <chr>, MT_nM_41 <chr>, Predicted_Binding <chr>, MT_nM <chr>,
## #   MT_Aff_Binder <chr>
OMICRON_PEPTIDES_BINDING_SCORES %>% nrow
## [1] 81

For 9/10mers of the omicron mutants, what are dominant HLAs?

  • HLA-Bs and HLA-Cs.
  • only 7 bind A0201
OMICRON_PEPTIDES_BINDING_SCORES%>% mutate(Length=nchar(VariantAlignment)) %>% filter(Length%in% c(9,10))%>% select(VariantAlignment, MT_Predicted_Binding, MT_Binder,Length) %>% separate_rows_(cols=c("MT_Predicted_Binding", "MT_Binder"), sep=",") %>% distinct() %>% group_by(MT_Predicted_Binding,MT_Binder,Length) %>%mutate(Length = as.character(Length))%>%
    dplyr::summarise(n=n())%>% filter(MT_Binder == "BINDER")%>% arrange(desc(n))%>%
    ggbarplot(x="MT_Predicted_Binding",y="n",fill="Length",position = position_dodge())+coord_flip()
## `summarise()` has grouped output by 'MT_Predicted_Binding', 'MT_Binder'. You can override using the `.groups` argument.

OMICRON_PEPTIDES_BINDING_SCORES%>% mutate(Length=nchar(VariantAlignment)) %>% filter(Length%in% c(9,10))%>% select(VariantAlignment, MT_Predicted_Binding, MT_Binder,Length) %>% separate_rows_(cols=c("MT_Predicted_Binding", "MT_Binder"), sep=",") %>% distinct() %>% filter(MT_Predicted_Binding == "HLA-A0201")%>% group_by(MT_Predicted_Binding,MT_Binder,Length) %>%mutate(Length = as.character(Length))%>%
    dplyr::summarise(n=n())%>% filter(MT_Binder == "BINDER")%>% arrange(desc(n))
## `summarise()` has grouped output by 'MT_Predicted_Binding', 'MT_Binder'. You can override using the `.groups` argument.
## # A tibble: 2 × 4
## # Groups:   MT_Predicted_Binding, MT_Binder [1]
##   MT_Predicted_Binding MT_Binder Length     n
##   <chr>                <chr>     <chr>  <int>
## 1 HLA-A0201            BINDER    9          7
## 2 HLA-A0201            BINDER    10         2

Read in mutations

MUTATIONS = readRDS("OMICRON_EPITOPE_MUTATIONS.rds")
MUTATIONS %>% mutate(Length = nchar(Peptide))%>% filter(Protein == 'surface glycoprotein') %>% filter(Length== 9)%>% nrow
## [1] 28
#MUTATIONS=MUTATIONS %>% group_by(Peptide, VariantAlignment,start_pos,Mutation) %>% dplyr::summarise(Protein = paste0(Protein, collapse = ","))

join all

MUTATIONS %>% inner_join(WUHAN_PEPTIDES_BINDING_SCORES)%>% nrow
## Joining, by = "Peptide"
## [1] 80
MUTATIONS %>% inner_join(OMICRON_PEPTIDES_BINDING_SCORES) %>% nrow
## Joining, by = "VariantAlignment"
## [1] 80
FULL_MUTATIONS_DT=MUTATIONS %>% inner_join(WUHAN_PEPTIDES_BINDING_SCORES) %>% inner_join(OMICRON_PEPTIDES_BINDING_SCORES)
## Joining, by = "Peptide"
## Joining, by = c("VariantAlignment", "Predicted_Binding")
#FULL_MUTATIONS_DT %>% head %>% DT::datatable()

#saveRDS(FULL_MUTATIONS_DT,file="FULL_MUTATIONS_BINDING.rds")
# mUTATED+HOMOLOGOUS oeptide
FULL_MUTATIONS_DT %>% filter(Peptide %in% "WLLWPVTLA")
## # A tibble: 1 × 32
##   Peptide   VariantAlignment start_pos Protein      MutationPos Mutation Hamming
##   <chr>     <chr>                <int> <chr>        <chr>       <chr>      <dbl>
## 1 WLLWPVTLA WLLWPVTLT               55 membrane gl… 63          A63T           1
## # … with 25 more variables: Predicted_Binding <chr>, Thalf(h) <chr>,
## #   EL-score <chr>, EL_Rank <chr>, BA-score <chr>, BA_Rank <chr>, Ave <chr>,
## #   Binder <chr>, nM_41 <chr>, nM <chr>, Aff_Binder <chr>, Length <int>,
## #   HydrophobicCount <int>, HydroFraction <dbl>, MT_Predicted_Binding <chr>,
## #   MT_Thalf(h) <chr>, MT_EL-score <chr>, MT_EL_Rank <chr>, MT_BA-score <chr>,
## #   MT_BA_Rank <chr>, MT_Ave <chr>, MT_Binder <chr>, MT_nM_41 <chr>,
## #   MT_nM <chr>, MT_Aff_Binder <chr>
FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment) %>% distinct()
## # A tibble: 80 × 2
##    Peptide    VariantAlignment
##    <chr>      <chr>           
##  1 AEHVNNSY   AEYVNNSY        
##  2 AKSHNIALIW AKSHNITLIW      
##  3 APGQTGKIA  APGQTGNIA       
##  4 APRITFGGP  ALRITFGGP       
##  5 CNDPFLGVY  CNDPFLDHK       
##  6 CNDPFLGVYY CNDPFLDHKN      
##  7 DGVYFASTEK DGVYFASIEK      
##  8 DSKVGGNYNY DSKVSGNYNY      
##  9 EELKKLLEQW EELKKLLEEW      
## 10 FCNDPFLGVY FCNDPFLDHK      
## # … with 70 more rows
FULL_MUTATIONS_DT %>% nrow
## [1] 80
DATA_FOR_ANALYSIS=FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment, Predicted_Binding, EL_Rank, MT_EL_Rank, Binder,MT_Binder) %>% separate_rows_(cols = c("Predicted_Binding","EL_Rank","MT_EL_Rank","Binder","MT_Binder"),sep=",")


DATA_FOR_ANALYSIS %>% nrow
## [1] 5120

Loss / gain plots for HLA A/B peptides

  • A/B asupertypes were mapped as of https://bmcimmunol.biomedcentral.com/articles/10.1186/1471-2172-9-1#Abs1
  • HLA C were inferred by using the representitive
  • Try HLA A/B first
  • Problem is 1) low sample number, and 2) not sure if just essentially plotting the distribution of binding in thw dataset. Some HLA alleles (captued by supertypes), will bind these peptides more, thus potentially captured in loss/gain more.
  • Need to think through more.
AB_SUPERTYPES=fread("HLA_AB_SUPERTYPES.csv") %>% mutate(Allele = gsub("\\*","",Allele))%>% mutate(Allele = paste0("HLA-",Allele))%>% dplyr::rename(HLA_Allele = Allele)

DATA_FOR_ANALYSIS%>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T))%>% nrow
## [1] 3600
HLA_A_B_DATA_FOR_ANALYSIS =DATA_FOR_ANALYSIS%>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T))%>% mutate(EL_Rank = as.numeric(EL_Rank), MT_EL_Rank=as.numeric(MT_EL_Rank))

HLA_A_B_DATA_FOR_ANALYSIS=HLA_A_B_DATA_FOR_ANALYSIS%>% mutate(LossGain = case_when(
        (Binder == "BINDER" & MT_Binder == "NONBINDER") ~ "Loss",
        (Binder == "NONBINDER" & MT_Binder == "NONBINDER") ~ "Same_NonBinder",
        (Binder == "NONBINDER" & MT_Binder == "BINDER") ~ "Gain",
        (Binder == "BINDER" & MT_Binder == "BINDER") ~ "Same_Binder"
))

HLA_A_B_DATA_FOR_ANALYSIS %>% filter(LossGain %in% c("Loss","Gain"))%>%group_by(HLA_Allele, LossGain) %>% dplyr::summarise(n=n())%>% inner_join(AB_SUPERTYPES)%>%
        ggboxplot(x="LossGain",y="n",facet.by = "Supertype")+stat_compare_means(label="p.signif")
## `summarise()` has grouped output by 'HLA_Allele'. You can override using the `.groups` argument.
## Joining, by = "HLA_Allele"
## Warning: Computation failed in `stat_compare_means()`:
## argument "x" is missing, with no default

Agretopicities grouped by sipertype

  • HLA C inferred
DATA_FOR_ANALYSIS=FULL_MUTATIONS_DT %>% select(Peptide, VariantAlignment, Predicted_Binding, nM_41, MT_nM_41, Binder,MT_Binder) %>% separate_rows_(cols = c("Predicted_Binding","nM_41","MT_nM_41","Binder","MT_Binder"),sep=",")%>% mutate(nM_41 = as.numeric(nM_41), MT_nM_41=as.numeric(MT_nM_41))%>% mutate(Agretopicity = MT_nM_41/nM_41)

#DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T)) %>% inner_join(AB_SUPERTYPES)%>%
#        ggboxplot(x="Supertype",y="Agretopicity")+scale_y_log10()+rotate_x_text()
# without hlac
HLA_A_B_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T)) %>% inner_join(AB_SUPERTYPES) %>%
filter(! (Binder == "NONBINDER" & MT_Binder == 'NONBINDER'))
## Joining, by = "HLA_Allele"
HLA_CDATASET_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(!HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T))%>% mutate(Supertype = str_extract(HLA_Allele,"HLA-C[0-9][0-9]"))%>%
        mutate(Supertype = gsub("HLA\\-","",Supertype))%>%
filter(! (Binder == "NONBINDER" & MT_Binder == 'NONBINDER'))

HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>%
        ggboxplot(x="Supertype",y="Agretopicity")+scale_y_log10()+geom_hline(yintercept = 1,linetype="dashed")+rotate_x_text()

HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>%arrange((HLA_Allele))%>%
        ggboxplot(x="HLA_Allele",y="Agretopicity",add="jitter")+scale_y_log10()+geom_hline(yintercept = 1,linetype="dashed")+rotate_x_text()

HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>%
        ggpaired(cond1 = "nM_41", cond2 = "MT_nM_41",line.color = "gray")+facet_wrap(~Supertype,ncol=9)+scale_y_log10()+stat_compare_means(label = "p.signif")+rotate_x_text(angle=45)+ylab("Binding Affinity (nM)")

spike only?

DATA_FOR_ANALYSIS=FULL_MUTATIONS_DT %>% filter(Protein == "surface glycoprotein") %>% select(Peptide, VariantAlignment, Predicted_Binding, nM_41, MT_nM_41, Binder,MT_Binder) %>% separate_rows_(cols = c("Predicted_Binding","nM_41","MT_nM_41","Binder","MT_Binder"),sep=",")%>% mutate(nM_41 = as.numeric(nM_41), MT_nM_41=as.numeric(MT_nM_41))%>% mutate(Agretopicity = MT_nM_41/nM_41)

DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T)) %>% inner_join(AB_SUPERTYPES)%>%
        ggboxplot(x="Supertype",y="Agretopicity")+scale_y_log10()+rotate_x_text()
## Joining, by = "HLA_Allele"

# without hlac
HLA_A_B_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T)) %>% inner_join(AB_SUPERTYPES) %>%
filter(! (Binder == "NONBINDER" & MT_Binder == 'NONBINDER'))
## Joining, by = "HLA_Allele"
HLA_CDATASET_AGRETOPICITY=DATA_FOR_ANALYSIS %>% dplyr::rename(HLA_Allele=Predicted_Binding)%>% filter(!HLA_Allele %in% grep("HLA-A|HLA-B",HLA_Allele,value = T))%>% mutate(Supertype = str_extract(HLA_Allele,"HLA-C[0-9][0-9]"))%>%
        mutate(Supertype = gsub("HLA\\-","",Supertype))%>%
filter(! (Binder == "NONBINDER" & MT_Binder == 'NONBINDER'))

HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>%
        ggboxplot(x="Supertype",y="Agretopicity")+scale_y_log10()+geom_hline(yintercept = 1,linetype="dashed")+rotate_x_text()

HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>%arrange((HLA_Allele))%>%
        ggboxplot(x="HLA_Allele",y="Agretopicity",add="jitter")+scale_y_log10()+geom_hline(yintercept = 1,linetype="dashed")+rotate_x_text()

HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>% dplyr::rename(WT_Binder=Binder)%>%
        pivot_longer(cols = c(WT_Binder, MT_Binder))%>%
        group_by(Supertype, name,value) %>% dplyr::summarise(n=n())%>% filter(value == "BINDER")%>% ungroup%>% arrange(desc(n))%>% mutate(Supertype = forcats::fct_inorder(Supertype))%>%
        ggbarplot(x="Supertype",y="n",fill="name",position = position_dodge())+rotate_x_text(angle=90)+ylab("# peptides w/ mutations in Omicron predicted to bind")
## `summarise()` has grouped output by 'Supertype', 'name'. You can override using the `.groups` argument.

spike only

HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>%
        ggpaired(cond1 = "nM_41", cond2 = "MT_nM_41",line.color = "gray")+facet_wrap(~Supertype,ncol=9)+scale_y_log10()+stat_compare_means(label = "p.signif")+rotate_x_text(angle=45)+ylab("Binding Affinity (nM)")

BA1_SUPERTYPE_BOXPLT=HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>% filter(Supertype %in% c("A02","A03","B07","C01"))%>%
        dplyr::rename(Wuhan=nM_41, Omicron = MT_nM_41)%>%
        ggpaired(cond1 = "Wuhan", cond2 = "Omicron",line.color = "gray")+facet_wrap(~Supertype,nrow=1)+theme_pubr(base_size = 16)+scale_y_log10()+stat_compare_means(label = "p.signif",label.x.npc = "center")+rotate_x_text(angle=45)+ylab("Log10 Binding Affinity (nM)")

BA1_SUPERTYPE_BOXPLT

saveRDS(BA1_SUPERTYPE_BOXPLT,file="BA1_SUPERTYPE_BOXPLT.rds")
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY)%>% filter(Supertype == 'B07') %>% arrange(desc(Agretopicity))
## # A tibble: 26 × 9
##    Peptide    VariantAlignment HLA_Allele   nM_41 MT_nM_41 Binder MT_Binder
##    <chr>      <chr>            <chr>        <dbl>    <dbl> <chr>  <chr>    
##  1 SPRRARSVA  SHRRARSVA        HLA-B0702     4.65    4722. BINDER NONBINDER
##  2 SPRRARSVA  SHRRARSVA        HLA-B4201     8.90    6169. BINDER NONBINDER
##  3 SPRRARSV   SHRRARSV         HLA-B0702    24.4    12722. BINDER NONBINDER
##  4 SPRRARSV   SHRRARSV         HLA-B4201    38.6    11617. BINDER NONBINDER
##  5 FCNDPFLGVY FCNDPFLDHK       HLA-B3501   991.     38357. BINDER NONBINDER
##  6 VTWFHAIHV  VTWFHVISG        HLA-B5101  7776.     30330. BINDER NONBINDER
##  7 VGYQPYRVV  VGHQPYRVV        HLA-B5101  3840.      9665. BINDER NONBINDER
##  8 NGVEGFNCY  NGVAGFNCY        HLA-B3501   347.       639. BINDER BINDER   
##  9 YGFQPTNGV  YSFRPTYGV        HLA-B5101  3488.      5066. BINDER BINDER   
## 10 SVLYNSASF  SVLYNLAPF        HLA-B3501   153.       191. BINDER BINDER   
## # … with 16 more rows, and 2 more variables: Agretopicity <dbl>,
## #   Supertype <chr>
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>% dplyr::rename(WT_Binder=Binder)%>%
        pivot_longer(cols = c(WT_Binder, MT_Binder))%>%
        group_by(Supertype, name,value) %>% dplyr::summarise(n=n())%>% filter(value == "BINDER")%>% ungroup%>% arrange(desc(n))%>%filter(Supertype == 'B07')%>% arrange((n))%>%
  mutate(pct_change = (n/lead(n) -1) * 100)
## `summarise()` has grouped output by 'Supertype', 'name'. You can override using the `.groups` argument.
## # A tibble: 2 × 5
##   Supertype name      value      n pct_change
##   <chr>     <chr>     <chr>  <int>      <dbl>
## 1 B07       MT_Binder BINDER    19      -17.4
## 2 B07       WT_Binder BINDER    23       NA
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>% dplyr::rename(WT_Binder=Binder)%>%
        pivot_longer(cols = c(WT_Binder, MT_Binder))%>%
        group_by(Supertype, name,value) %>% dplyr::summarise(n=n())%>% filter(value == "BINDER")%>% ungroup%>% arrange(desc(n))%>%filter(Supertype == 'A01')%>% arrange((n))%>%
  mutate(pct_change = (n/lead(n) -1) * 100)
## `summarise()` has grouped output by 'Supertype', 'name'. You can override using the `.groups` argument.
## # A tibble: 2 × 5
##   Supertype name      value      n pct_change
##   <chr>     <chr>     <chr>  <int>      <dbl>
## 1 A01       MT_Binder BINDER    39      -11.4
## 2 A01       WT_Binder BINDER    44       NA
HLA_A_B_AGRETOPICITY %>% rbind(HLA_CDATASET_AGRETOPICITY) %>% dplyr::rename(WT_Binder=Binder)%>%
        pivot_longer(cols = c(WT_Binder, MT_Binder))%>%
        group_by(Supertype, name,value) %>% dplyr::summarise(n=n())%>% filter(value == "BINDER")%>% ungroup%>% arrange(desc(n))%>% mutate(Supertype = forcats::fct_inorder(Supertype))%>%
        ggbarplot(x="Supertype",y="n",fill="name",position = position_dodge())+rotate_x_text(angle=90)+ylab("# peptides w/ mutations in Omicron predicted to bind")
## `summarise()` has grouped output by 'Supertype', 'name'. You can override using the `.groups` argument.